# Download data
url <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE137001&format=file&file=GSE137001%5FTPM%5Freprogramming%5Fintermediates%2Exlsx"
filename <- "GSE137001_TPM_reprogramming_intermediates.xlsx"
if (!file.exists(filename)) {
download.file(url, filename)
}
tpms <- read_excel(filename)
tpms_mat <- tpms %>% select(3:41) %>% as.matrix()
metadata <- data.frame(sample_name = colnames(tpms_mat)) %>%
mutate(group = gsub("(d\\d+_|_R\\d)", "", sample_name)) %>%
mutate(days = case_when(
startsWith(sample_name, "d") ~ sub("d(\\d+).+", "\\1", sample_name),
TRUE ~ "NA"
)) %>%
mutate(rep = gsub(".+R(\\d)", "\\1", sample_name)) %>%
column_to_rownames("sample_name")
PCA
# I don't think this transformation is correct
trans_mat <- log10(1 + (tpms_mat %>% t()))
pca_res <- stats::prcomp(trans_mat)
pca_df <- pca_res$x %>%
as.data.frame() %>%
rownames_to_column("sample_name") %>%
mutate(group = gsub("(d\\d+_|_R\\d)", "", sample_name)) %>%
mutate(days = case_when(
startsWith(sample_name, "d") ~ sub("d(\\d+).+", "\\1", sample_name),
TRUE ~ "NA"
))
pca_df %>%
filter(group %in% c("SKM", "OSKM")) %>%
ggplot(aes(
x = PC1, y = PC2, color = days, shape = group
)) +
geom_point(size = 3)

# Get gene list to filter
stress_pathways <- c("GO:0036500") # Just the ATF6 pathway
# stress_pathways <- c("GO:0009267", "GO:0034198", "GO:0036003", "GO:0036500", "GO:1990928", "GO:0034599", "GO:0034976", "GO:0016236", "GO:0010506")
gostres <- gprofiler2::gost(stress_pathways)
stress_ensg <- gostres$meta$genes_metadata$query$query_1$ensgs
ens2sym <- AnnotationDbi::select(
EnsDb.Hsapiens.v86,
keys = keys(EnsDb.Hsapiens.v86),
columns = c("SYMBOL")
)
stress_genes <- ens2sym %>%
filter(GENEID %in% stress_ensg) %>%
pull(SYMBOL)
# Filter data for selected stress genes
tpms$Name <- unlist(lapply(tpms$Name, toupper)) # Capitalize gene names
cat("% stress genes in dataset:", 100*mean(stress_genes %in% tpms$Name))
% stress genes in dataset: 100
stress_tpms <- tpms %>% filter(Name %in% stress_genes)
# Select only the SKM and OSKM samples
# Scale the TPMs of each gene to be between 0 and 1
# Note: I couldn't figure out how to scale row-wise, so I transposed instead
scaled_tpms <- stress_tpms[3:41] %>%
select(contains("SKM_R")) %>%
t() %>%
as.data.frame() %>%
mutate_all(scales::rescale) %>% # Min-max scaling to be between 0 and 1
t() %>%
as_tibble() %>%
mutate(Gene = stress_tpms$Name) %>%
relocate(Gene)
# Format data from wide to long for plotting
df <- scaled_tpms %>%
pivot_longer(
contains("_"),
names_to = c("Day", "Factors", "Rep"),
names_pattern = "d(\\d+)_(\\w+)_R(\\d+)"
) %>%
rename(scaled_TPM = value) %>%
mutate_if(is.character, as.factor)
Boxplot
df %>%
ggplot(aes(x = Day, y = scaled_TPM)) +
stat_boxplot(geom ='errorbar', width = 0.5) +
geom_boxplot() +
facet_wrap(~Factors) +
geom_jitter(color = "blue", width = 0.05)

Lineplot
ggp <- df %>%
ggplot(aes(x = Day, y = scaled_TPM, group = Gene)) +
geom_line(aes(color = Gene)) +
facet_wrap(~Factors + Rep)
ggplotly(ggp)
res <- aov(scaled_TPM ~ Day, data = df %>% filter(Factors == "OSKM"))
summary(res)
Df Sum Sq Mean Sq F value Pr(>F)
Day 3 1.946 0.6487 7.727 0.000143 ***
Residuals 76 6.380 0.0840
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res <- aov(scaled_TPM ~ Day, data = df %>% filter(Factors == "SKM"))
summary(res)
Df Sum Sq Mean Sq F value Pr(>F)
Day 3 1.369 0.4562 6.295 0.000715 ***
Residuals 76 5.507 0.0725
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
LS0tCnRpdGxlOiAiQXZlcmFnZSBUUE0gQWNyb3NzIFRpbWUgb2YgU2VsZWN0IEdlbmVzIChHU0UxMzcwMDEpIgphdXRob3I6ICJKYW1lcyBEYW8iCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZm9sZGluZzogaGlkZQogICAgdGhlbWU6IGNlcnVsZWFuCi0tLQoKYGBge3IgaW5jbHVkZT1GQUxTRX0KbGlicmFyeShyZWFkeGwpCmxpYnJhcnkoRW5zRGIuSHNhcGllbnMudjg2KQpsaWJyYXJ5KERFU2VxMikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkocGxvdGx5KQpgYGAKCgpgYGB7cn0KIyBEb3dubG9hZCBkYXRhCnVybCA8LSAiaHR0cHM6Ly93d3cubmNiaS5ubG0ubmloLmdvdi9nZW8vZG93bmxvYWQvP2FjYz1HU0UxMzcwMDEmZm9ybWF0PWZpbGUmZmlsZT1HU0UxMzcwMDElNUZUUE0lNUZyZXByb2dyYW1taW5nJTVGaW50ZXJtZWRpYXRlcyUyRXhsc3giCmZpbGVuYW1lIDwtICJHU0UxMzcwMDFfVFBNX3JlcHJvZ3JhbW1pbmdfaW50ZXJtZWRpYXRlcy54bHN4IgppZiAoIWZpbGUuZXhpc3RzKGZpbGVuYW1lKSkgewogIGRvd25sb2FkLmZpbGUodXJsLCBmaWxlbmFtZSkKfQp0cG1zIDwtIHJlYWRfZXhjZWwoZmlsZW5hbWUpCmBgYAoKCmBgYHtyfQp0cG1zX21hdCA8LSB0cG1zICU+JSBzZWxlY3QoMzo0MSkgJT4lIGFzLm1hdHJpeCgpCgptZXRhZGF0YSA8LSBkYXRhLmZyYW1lKHNhbXBsZV9uYW1lID0gY29sbmFtZXModHBtc19tYXQpKSAlPiUKICBtdXRhdGUoZ3JvdXAgPSBnc3ViKCIoZFxcZCtffF9SXFxkKSIsICIiLCBzYW1wbGVfbmFtZSkpICU+JQogIG11dGF0ZShkYXlzID0gY2FzZV93aGVuKAogICAgc3RhcnRzV2l0aChzYW1wbGVfbmFtZSwgImQiKSB+IHN1YigiZChcXGQrKS4rIiwgIlxcMSIsIHNhbXBsZV9uYW1lKSwKICAgIFRSVUUgfiAiTkEiCiAgKSkgJT4lCiAgbXV0YXRlKHJlcCA9IGdzdWIoIi4rUihcXGQpIiwgIlxcMSIsIHNhbXBsZV9uYW1lKSkgJT4lCiAgY29sdW1uX3RvX3Jvd25hbWVzKCJzYW1wbGVfbmFtZSIpCmBgYAoKIyBQQ0EKCmBgYHtyfQojIEkgZG9uJ3QgdGhpbmsgdGhpcyB0cmFuc2Zvcm1hdGlvbiBpcyBjb3JyZWN0CnRyYW5zX21hdCA8LSBsb2cxMCgxICsgKHRwbXNfbWF0ICU+JSB0KCkpKQoKcGNhX3JlcyA8LSBzdGF0czo6cHJjb21wKHRyYW5zX21hdCkKCnBjYV9kZiA8LSBwY2FfcmVzJHggJT4lCiAgYXMuZGF0YS5mcmFtZSgpICU+JQogIHJvd25hbWVzX3RvX2NvbHVtbigic2FtcGxlX25hbWUiKSAlPiUKICBtdXRhdGUoZ3JvdXAgPSBnc3ViKCIoZFxcZCtffF9SXFxkKSIsICIiLCBzYW1wbGVfbmFtZSkpICU+JQogIG11dGF0ZShkYXlzID0gY2FzZV93aGVuKAogICAgc3RhcnRzV2l0aChzYW1wbGVfbmFtZSwgImQiKSB+IHN1YigiZChcXGQrKS4rIiwgIlxcMSIsIHNhbXBsZV9uYW1lKSwKICAgIFRSVUUgfiAiTkEiCiAgKSkKCnBjYV9kZiAlPiUKICBmaWx0ZXIoZ3JvdXAgJWluJSBjKCJTS00iLCAiT1NLTSIpKSAlPiUKICBnZ3Bsb3QoYWVzKAogICAgeCA9IFBDMSwgeSA9IFBDMiwgY29sb3IgPSBkYXlzLCBzaGFwZSA9IGdyb3VwCiAgKSkgKyAKICBnZW9tX3BvaW50KHNpemUgPSAzKQpgYGAKCgoKYGBge3J9CiMgR2V0IGdlbmUgbGlzdCB0byBmaWx0ZXIKc3RyZXNzX3BhdGh3YXlzIDwtIGMoIkdPOjAwMzY1MDAiKSAgIyBKdXN0IHRoZSBBVEY2IHBhdGh3YXkKIyBzdHJlc3NfcGF0aHdheXMgPC0gYygiR086MDAwOTI2NyIsICJHTzowMDM0MTk4IiwgIkdPOjAwMzYwMDMiLCAiR086MDAzNjUwMCIsICJHTzoxOTkwOTI4IiwgIkdPOjAwMzQ1OTkiLCAiR086MDAzNDk3NiIsICJHTzowMDE2MjM2IiwgIkdPOjAwMTA1MDYiKQoKZ29zdHJlcyA8LSBncHJvZmlsZXIyOjpnb3N0KHN0cmVzc19wYXRod2F5cykKc3RyZXNzX2Vuc2cgPC0gZ29zdHJlcyRtZXRhJGdlbmVzX21ldGFkYXRhJHF1ZXJ5JHF1ZXJ5XzEkZW5zZ3MKCmVuczJzeW0gPC0gQW5ub3RhdGlvbkRiaTo6c2VsZWN0KAogIEVuc0RiLkhzYXBpZW5zLnY4NiwKICBrZXlzID0ga2V5cyhFbnNEYi5Ic2FwaWVucy52ODYpLAogIGNvbHVtbnMgPSBjKCJTWU1CT0wiKQopCgpzdHJlc3NfZ2VuZXMgPC0gZW5zMnN5bSAlPiUKICBmaWx0ZXIoR0VORUlEICVpbiUgc3RyZXNzX2Vuc2cpICU+JQogIHB1bGwoU1lNQk9MKQpgYGAKCgpgYGB7cn0KIyBGaWx0ZXIgZGF0YSBmb3Igc2VsZWN0ZWQgc3RyZXNzIGdlbmVzCnRwbXMkTmFtZSA8LSB1bmxpc3QobGFwcGx5KHRwbXMkTmFtZSwgdG91cHBlcikpICAjIENhcGl0YWxpemUgZ2VuZSBuYW1lcwpjYXQoIiUgc3RyZXNzIGdlbmVzIGluIGRhdGFzZXQ6IiwgMTAwKm1lYW4oc3RyZXNzX2dlbmVzICVpbiUgdHBtcyROYW1lKSkKCnN0cmVzc190cG1zIDwtIHRwbXMgJT4lIGZpbHRlcihOYW1lICVpbiUgc3RyZXNzX2dlbmVzKQpgYGAKCgpgYGB7cn0KIyBTZWxlY3Qgb25seSB0aGUgU0tNIGFuZCBPU0tNIHNhbXBsZXMKIyBTY2FsZSB0aGUgVFBNcyBvZiBlYWNoIGdlbmUgdG8gYmUgYmV0d2VlbiAwIGFuZCAxCiMgTm90ZTogSSBjb3VsZG4ndCBmaWd1cmUgb3V0IGhvdyB0byBzY2FsZSByb3ctd2lzZSwgc28gSSB0cmFuc3Bvc2VkIGluc3RlYWQKc2NhbGVkX3RwbXMgPC0gc3RyZXNzX3RwbXNbMzo0MV0gJT4lCiAgc2VsZWN0KGNvbnRhaW5zKCJTS01fUiIpKSAlPiUKICB0KCkgJT4lCiAgYXMuZGF0YS5mcmFtZSgpICU+JQogIG11dGF0ZV9hbGwoc2NhbGVzOjpyZXNjYWxlKSAlPiUgICMgTWluLW1heCBzY2FsaW5nIHRvIGJlIGJldHdlZW4gMCBhbmQgMQogIHQoKSAlPiUKICBhc190aWJibGUoKSAlPiUKICBtdXRhdGUoR2VuZSA9IHN0cmVzc190cG1zJE5hbWUpICU+JQogIHJlbG9jYXRlKEdlbmUpCmBgYAoKCmBgYHtyfQojIEZvcm1hdCBkYXRhIGZyb20gd2lkZSB0byBsb25nIGZvciBwbG90dGluZwpkZiA8LSBzY2FsZWRfdHBtcyAlPiUKICBwaXZvdF9sb25nZXIoCiAgICBjb250YWlucygiXyIpLAogICAgbmFtZXNfdG8gPSBjKCJEYXkiLCAiRmFjdG9ycyIsICJSZXAiKSwKICAgIG5hbWVzX3BhdHRlcm4gPSAiZChcXGQrKV8oXFx3KylfUihcXGQrKSIKICApICU+JQogIHJlbmFtZShzY2FsZWRfVFBNID0gdmFsdWUpICU+JQogIG11dGF0ZV9pZihpcy5jaGFyYWN0ZXIsIGFzLmZhY3RvcikKYGBgCgojIEJveHBsb3QKCmBgYHtyfQpkZiAlPiUKICBnZ3Bsb3QoYWVzKHggPSBEYXksIHkgPSBzY2FsZWRfVFBNKSkgKwogIHN0YXRfYm94cGxvdChnZW9tID0nZXJyb3JiYXInLCB3aWR0aCA9IDAuNSkgKwogIGdlb21fYm94cGxvdCgpICsKICBmYWNldF93cmFwKH5GYWN0b3JzKSArIAogIGdlb21faml0dGVyKGNvbG9yID0gImJsdWUiLCB3aWR0aCA9IDAuMDUpCmBgYAoKIyBMaW5lcGxvdAoKYGBge3IgZmlnLndpZHRoPTh9CmdncCA8LSBkZiAlPiUKICBnZ3Bsb3QoYWVzKHggPSBEYXksIHkgPSBzY2FsZWRfVFBNLCBncm91cCA9IEdlbmUpKSArCiAgZ2VvbV9saW5lKGFlcyhjb2xvciA9IEdlbmUpKSArCiAgZmFjZXRfd3JhcCh+RmFjdG9ycyArIFJlcCkKCmdncGxvdGx5KGdncCkKYGBgCgoKYGBge3J9CnJlcyA8LSBhb3Yoc2NhbGVkX1RQTSB+IERheSwgZGF0YSA9IGRmICU+JSBmaWx0ZXIoRmFjdG9ycyA9PSAiT1NLTSIpKQpzdW1tYXJ5KHJlcykKYGBgCgpgYGB7cn0KcmVzIDwtIGFvdihzY2FsZWRfVFBNIH4gRGF5LCBkYXRhID0gZGYgJT4lIGZpbHRlcihGYWN0b3JzID09ICJTS00iKSkKc3VtbWFyeShyZXMpCmBgYAoKCgoKCgoKCg==